
# This file creates data for parents who are twenty years older than those in main analyses --------------------

# install.packages("foreign")
# install.packages("dplyr")
# install.packages("date")

library(foreign)
library(dplyr)
library(date)

#Load data (set wd)
raw_data <- read.dta("stata_data.dta")

raw_data$date <- as.numeric(raw_data$FOED_DAG)

#Function for deciding birth order
birth_order_func <- 
  function(date) {
    birth_order <- rep(NA, length(date))
    for (i in 1:length(date)){
      birth_order[i] <- i
      if (i > 1) {
        if((date[i] - date[i-1]) < 8){
          birth_order[i] <- birth_order[i-1]
        }
      }
    }
    return(birth_order)
  }

# Subset to data with mother id and set birth order
twin_mom <- 
  raw_data %>%
  filter(mor_id != "" )  %>%
  group_by(mor_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

# Identify twins as same mother id born with same birth order 
twin_mom <- 
  twin_mom %>%
  group_by(mor_id, birth_order) %>%
  mutate(twin = length(date))

#Subset to persons with missing id for mother but not father and 
#define twins among them.

# Select mothers data with non-missing father id
twin_dad <- 
  twin_mom %>%
  filter(far_id != "")

# Identify birth order and twins among children with missing mother id 
twin_dad_2 <- 
  raw_data %>%
  filter(mor_id == "" & far_id != "")  %>%
  group_by(far_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

twin_dad_2 <- 
  twin_dad_2 %>%
  group_by(far_id, birth_order) %>%
  mutate(twin = length(date))

# Bind the two father frames together 
twin_dad <-
  rbind(twin_dad, twin_dad_2)


# Generate indicator for being first-born and twin and second-born and twin

twin_mom <-
  as.data.frame(twin_mom) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

twin_dad <-
  as.data.frame(twin_dad) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

# numeric values for dates (born after Jan 1, 1974 or Jan 1, 1988)
# Subtract 20 times 365 days and five leap days
date09 <- 8766  - 20*365 - 5
date13 <- 10227 - 20*365 - 5

# numeric values for the election days
date09 <- 8766   - 20*365 - 5
date13 <- 10227  - 20*365 - 5
date09_election <- 14565 - 20*365 - 5
date13_election <- 16028 - 20*365 - 5
date14_election <- 16216 - 20*365 - 5
  
  
# Filter of parents with first borns older than Jan 1 1994 
# Create dummy for first born in each election being born after cutoff dates (used to condition on later)
# Recode mother mother id to pnr for merging back on turnout
# Group by mother id 

twin_mom_agg <- 
  twin_mom %>%
  filter(date >= date09) %>%
  mutate(first09 = birth_order == 1 & date >= date09,
         first13 = birth_order == 1 & date >= date13,
         pnr     = mor_id) %>%
  group_by(pnr) %>%
  summarise(twin_first = sum(twin_first),
            first09    = sum(first09),
            first13    = sum(first13),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest   = min(date)) %>%
  select(pnr, twin_first, first09, first13, no_children_09, 
         no_children_13, no_children_14, age_oldest)

# Select pnr, turnout data and age
mothers <- 
  twin_mom %>%
  filter(female == 1) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

#Merge information about children to turnout
mothers <- 
  left_join(mothers, twin_mom_agg, by = "pnr")

# Repeat lines 89-115 for fathers ----------------------------------------

twin_dad_agg <- 
  as.data.frame(twin_dad) %>%
  filter(date > date09) %>%
  mutate(first09 = birth_order == 1 & date >= date09,
         first13 = birth_order == 1 & date >= date13,
         pnr     = far_id) %>%
  group_by(pnr) %>%
  summarise(twin_first = sum(twin_first),
            first09    = sum(first09),
            first13    = sum(first13),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest   = min(date)) %>%
  select(pnr, twin_first, first09, first13, no_children_09, 
         no_children_13, no_children_14, age_oldest)

fathers <- 
  twin_dad %>%
  filter(female == 0) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

fathers <- 
  left_join(fathers, twin_dad_agg, by = "pnr")


save(mothers, file = "mothers_old_20.rdata")
save(fathers, file = "fathers_old_20.rdata")
